/*

	Program to construct the pupil data tables for the "Making the Grade" paper
	
*/



#delimit;
*set trace on;




	
capture program drop run_regs;
program define run_regs;
syntax, outcome(string) outcome_raw(string) test(string) type(string) sample(string) col(string);

	quietly{;
	
		local regressors MT_Program CCT_Program;

	

		local ifstatement;
		if "`sample'"=="No_VHP_Cell"{;
			local ifstatement if group !=11;
		};
		if "`sample'"=="Males"{;
			local ifstatement if Female ==0;
		};	
		if "`sample'"=="Females"{;
			local ifstatement if Female ==1;
		};			
		
		

		local ctrls;
		capture drop mBL_outcome;
		capture drop _BL_outcome;
		local prefix EL_;
		
		if "`type'" == "ANCOVA"{;
			gen mBL_outcome = mi(BL_`outcome');
			gen _BL_outcome = BL_`outcome';
			replace _BL_outcome = 0 if mi(BL_`outcome');
			local ctrls mBL_outcome _BL_outcome;			
		};	
		

			
		*randomization inference;
			local ri_pvals C_vs_FC C_vs_RC RC_vs_FC;
			foreach ri_pval of local ri_pvals{;
				preserve;
					keep Study_Arm Female group group_`ri_pval' `prefix'`outcome' `ctrls' School_Code;
					local treat_est;
					if "`ri_pval'"=="C_vs_FC"{;
						local treat_est _b[2.Study_Arm];
					};
					if "`ri_pval'"=="C_vs_RC"{;
						local treat_est _b[1.Study_Arm];
					};
					if "`ri_pval'"=="RC_vs_FC"{;
						local treat_est (_b[2.Study_Arm]-_b[1.Study_Arm]);
					};
					ritest Study_Arm `treat_est', reps(1000) seed(100453) cluster(School_Code) strata(group_`ri_pval') noanalytics:
						areg `prefix'`outcome' i.Study_Arm `ctrls', a(group), `ifstatement';
					matrix define p_`ri_pval' =r(p);
					local p_`ri_pval' = p_`ri_pval'[1,1]; 					
					
				restore;
			};

		
		
		*main regressions;
			areg `prefix'`outcome' `regressors' `ctrls', a(group) cluster(School_Code), `ifstatement';
			
			local b_C_vs_FC = round(_b[MT_Program],0.001);
			local se_C_vs_FC = round(_se[MT_Program],0.001);			
			local b_C_vs_RC = round(_b[CCT_Program],0.001);			
			local se_C_vs_RC = round(_se[CCT_Program],0.001);			
			
		*other stats for table;
			local adjr2 = round(e(r2_a),0.001);
			local N = e(N);

			
		*difference between treatment effects;
			lincom MT_Program-CCT_Program;
			local b_RC_vs_FC = round(r(estimate),0.001);
			local se_RC_vs_FC = round(r(se),0.001);
			


		*control mean and SD;
			sum `prefix'`outcome_raw' if Study_Arm==0 & e(sample);
			local mean = round(r(mean),0.001);
			local sd = round(r(sd),0.001);
			
		*fix decimal places;
			local stats 
				b_C_vs_FC se_C_vs_FC p_C_vs_FC
				b_C_vs_RC se_C_vs_RC p_C_vs_RC
				adjr2
				b_RC_vs_FC se_RC_vs_FC p_RC_vs_FC
				mean sd;
				
			foreach stat of local stats{;
				local `stat'=round(``stat'',0.001);
				if ``stat''>0 & ``stat''<1 {;
					local `stat' 0``stat'';
				};
				if ``stat''>-1 & ``stat''<0 {;
					local `stat'=-1*``stat'';
					local `stat'=round(``stat'',0.001);
					local `stat' -0``stat'';
				};		
				if ``stat''==0 {;
					local `stat'="0."+"``stat''";
				};			
				if ``stat''==1 {;
					local `stat'="``stat''"+".0";
				};					
				local `stat'="``stat''"+"000";
				local `stat'=substr("``stat''",1,strpos("``stat''",".")+3);
						
						
			};*end loop over stats;	
			
	
		*compute significance stars and add to values;
			*also put brackets and parentheses on results;
			local ri_pvals C_vs_FC C_vs_RC RC_vs_FC;
			foreach ri_pval of local ri_pvals{;		
				local stars_`ri_pval'="";
				if `p_`ri_pval''<0.1{;
					local stars_`ri_pval'="*";
				};
				if `p_`ri_pval''<0.05{;
					local stars_`ri_pval'="**";
				};		
				if `p_`ri_pval''<0.01{;
					local stars_`ri_pval'="***";
				};					
			
				*tack stars onto difference estimates;
				local b_`ri_pval'="`b_`ri_pval''"+"`stars_`ri_pval''";
				
				*parentheses and brackets for SEs and p-values;
				local se_`ri_pval'="("+"`se_`ri_pval''"+")";
				local p_`ri_pval'="["+"`p_`ri_pval''"+"]";
				
			
			};
		
		
		preserve;
		
			local letter_column = char(`col'+65);

			local output_stats 
				outcome
				b_C_vs_FC se_C_vs_FC p_C_vs_FC
				b_C_vs_RC se_C_vs_RC p_C_vs_RC
				N adjr2
				b_RC_vs_FC se_RC_vs_FC p_RC_vs_FC
				mean sd;
			
			local n_stats : word count `output_stats';
			
			keep if _n<=`n_stats';
			keep Pupil_ID;
			gen stat = "";
			gen value = "";
			drop Pupil_ID;
			
			forvalues j = 1/`n_stats'{;
			
				local this_stat : word `j' of `output_stats';
				replace stat = "`this_stat'" if _n==`j';
				replace value = "``this_stat''" if _n==`j';
				
			
			};
			
			
			export excel value using "${dir}/`test' `version' Table ${S_DATE}`type'${year}`sample'.xlsx" if inrange(_n,1,1), cell(`letter_column'3) sheetmodify;

			export excel value using "${dir}/`test' `version' Table ${S_DATE}`type'${year}`sample'.xlsx" if inrange(_n,2,4), cell(`letter_column'4) sheetmodify;
			
			export excel value using "${dir}/`test' `version' Table ${S_DATE}`type'${year}`sample'.xlsx" if inrange(_n,5,7), cell(`letter_column'9) sheetmodify;
			
			export excel value using "${dir}/`test' `version' Table ${S_DATE}`type'${year}`sample'.xlsx" if inrange(_n,8,9), cell(`letter_column'13) sheetmodify;
			
			export excel value using "${dir}/`test' `version' Table ${S_DATE}`type'${year}`sample'.xlsx" if inrange(_n,10,12), cell(`letter_column'16) sheetmodify;
			
			export excel value using "${dir}/`test' `version' Table ${S_DATE}`type'${year}`sample'.xlsx" if inrange(_n,13,14), cell(`letter_column'21) sheetmodify;
		
		
		restore;
		
	
	};
	*end block to do quietly;
			
end;


	

*make tables for each test;
local tests EGRA EGWA OE;

use "${dta_dir}/1_Pupil_Data.dta", clear;

foreach test of local tests{;

	*specify components based on exam;
		local comps
			LN_Total
			IS_Total
			FW_Total
			IW_Total
			ORF_Total
			RC_Total;
		if "`test'"=="OE"{;
		
			local comps
				T1_V_Total
				T1_C_Total
				T2A_V_Total
				T2A_S_Total
				T2B_V_Total
				T2B_S_Total
				T3_V_Total
				T3_P_Total
				T4_W_Total
				;
		};
		if "`test'"=="EGWA"{;

			local comps		
				AN
				EN
				ide
				org
				voi
				wor
				sen
				con
				Pres
				;
		};
		

	

	local types ANCOVA Raw;
	local samples all No_VHP_Cell;

	foreach type of local types{;

		foreach sample of local samples{;
		
			copy "${code_dir}/blank_template.xlsx" "${dir}/`test' `version' Table ${S_DATE}`type'${year}`sample'.xlsx", replace;
			
			*pca index;
				local col = 1;
				run_regs, outcome(`test'_PCA_Index) outcome_raw(`test'_PCA_Index) test(`test') type(`type') sample(`sample') col(`col');
			
			*loop over components for this test;
				foreach comp of local comps{;
					local col = `col'+1;
					run_regs, outcome(`test'_`comp'_control_normed) outcome_raw(`test'_`comp') test(`test') type(`type') sample(`sample') col(`col');
					
				};*end loop over components;
			
			*"simple" index - for end of file, if we want to see it;
				local col = `col'+1;			
				run_regs, outcome(`test'_Simple_Index) outcome_raw(`test'_Simple_Index) test(`test') type(`type') sample(`sample') col(`col');

	
			
		}; *end loop over samples;
		
	};*end loop over types;
		
	
};*end loop over tests;
	

	
#delimit;
*set trace on;
*Lee bounds and wild cluster bootstrap p-values;

	
use "${dta_dir}/1_Pupil_Data_all.dta", clear;


*do these for each test;
local tests EGRA EGWA OE;

foreach test of local tests{;

	*specify components based on exam;
		local comps
			PCA_Index
			LN_Total
			IS_Total
			FW_Total
			IW_Total
			ORF_Total
			RC_Total;
		if "`test'"=="OE"{;
		
			local comps
				PCA_Index
				T1_V_Total
				T1_C_Total
				T2A_V_Total
				T2A_S_Total
				T2B_V_Total
				T2B_S_Total
				T3_V_Total
				T3_P_Total
				T4_W_Total
				;
		};
		if "`test'"=="EGWA"{;

			local comps	
				PCA_Index
				AN
				EN
				ide
				org
				voi
				wor
				sen
				con
				Pres
				;
		};
		
	capture erase "${dir}/Lee Bounds high FC `test' ${S_DATE}.xls";
	capture erase "${dir}/Lee Bounds low FC `test' ${S_DATE}.xls";
	capture erase "${dir}/Lee Bounds high RC `test' ${S_DATE}.xls";
	capture erase "${dir}/Lee Bounds low RC `test' ${S_DATE}.xls";
	
	capture erase "${dir}/Lee Bounds high FC `test' ${S_DATE}.txt";
	capture erase "${dir}/Lee Bounds low FC `test' ${S_DATE}.txt";
	capture erase "${dir}/Lee Bounds high RC `test' ${S_DATE}.txt";
	capture erase "${dir}/Lee Bounds low RC `test' ${S_DATE}.txt";	
	

	capture erase "${dir}/Wild Bootstrap `test' ${S_DATE}.xlsx";	
	copy "${code_dir}/Wild bootstrap template.xlsx" "${dir}/Wild Bootstrap `test' ${S_DATE}.xlsx";	
	putexcel set "${dir}/Wild Bootstrap `test' ${S_DATE}.xlsx", modify;
	local ncol 1;


	foreach comp of local comps{;		

		local score `test'_`comp'_control_normed;
		if "`comp'"=="PCA_Index"{;
			local score `test'_`comp';
		};

		preserve;
			
			capture drop has_followup;
			
			gen has_followup = !mi(EL_`score');

			sum has_followup if Study_Arm==2;
			scalar FCnoattrit = r(mean);
			sum has_followup if Study_Arm==1;
			scalar RCnoattrit = r(mean);		
			sum has_followup if Study_Arm==0;
			scalar Cnoattrit = r(mean);
			
			local FCcutoff = (Cnoattrit-FCnoattrit )/FCnoattrit;
			display "FC cutoff =";
			display `FCcutoff';
			local absFCcutoff = abs(`FCcutoff');
			
			
			local RCcutoff = (Cnoattrit-RCnoattrit )/RCnoattrit;
			display "RC cutoff =";
			display `RCcutoff';
			local absRCcutoff = abs(`RCcutoff');
			
			cumul EL_`score' if Study_Arm==2, gen(FCcumscore);
			cumul EL_`score' if Study_Arm==1, gen(RCcumscore);
			cumul EL_`score' if Study_Arm==0, gen(Ccumscore);
			
			if `FCcutoff'>=0{;
				local if_high_FC (Ccumscore<= 1-`absFCcutoff' |inlist(Study_Arm,1,2));
				local if_low_FC (Ccumscore>= `absFCcutoff' |inlist(Study_Arm,1,2));
			};
			else if `FCcutoff'<0{;
				local if_low_FC (FCcumscore<= 1-`absFCcutoff' |inlist(Study_Arm,1,0));
				local if_high_FC (FCcumscore>= `absFCcutoff' |inlist(Study_Arm,1,0));
			};		

			
			if `RCcutoff'>=0{;
				local if_high_RC (Ccumscore<= 1-`absRCcutoff' |inlist(Study_Arm,1,2));
				local if_low_RC (Ccumscore>= `absRCcutoff' |inlist(Study_Arm,1,2));
			};
			else if `RCcutoff'<0{;
				local if_low_RC (RCcumscore<= 1-`absRCcutoff' |inlist(Study_Arm,2,0));
				local if_high_RC (RCcumscore>= `absRCcutoff' |inlist(Study_Arm,2,0));
			};			
			
			

			*New bounded regressions;
			areg EL_`score' FC_Program RC_Program BL_`score', a(group) cluster(School_Code), if `if_high_FC';
			outreg2 FC_Program using "${dir}/Lee Bounds high FC `test' ${S_DATE}.xls",
				adjr2 dec(3) fmt(fc) nocons excel keep(FC_Program) sortvar(FC_Program) append;

			areg EL_`score' FC_Program RC_Program BL_`score', a(group) cluster(School_Code), if `if_low_FC';
			outreg2 FC_Program using "${dir}/Lee Bounds low FC `test' ${S_DATE}.xls",
				adjr2 dec(3) fmt(fc) nocons excel keep(FC_Program) sortvar(FC_Program) append;
			

			areg EL_`score' FC_Program RC_Program BL_`score', a(group) cluster(School_Code), if `if_high_RC';
			outreg2 RC_Program using "${dir}/Lee Bounds high RC `test' ${S_DATE}.xls",
				adjr2 dec(3) fmt(fc) nocons excel keep(RC_Program) sortvar(RC_Program) append;

			areg EL_`score' FC_Program RC_Program BL_`score', a(group) cluster(School_Code), if `if_low_RC';
			outreg2 RC_Program using "${dir}/Lee Bounds low RC `test' ${S_DATE}.xls",
				adjr2 dec(3) fmt(fc) nocons excel keep(RC_Program) sortvar(RC_Program) append;

			capture drop has_followup;
			
			keep if longitudinal_sample;
							
			areg EL_`score' FC_Program RC_Program BL_`score', a(group) cluster(School_Code);
			boottest {FC_Program=0} {RC_Program=0} {FC_Program==RC_Program}, noci reps(1000) seed(100453) weighttype(webb);
			
			local ncol = `ncol'+1;
			local col: word `ncol' of `c(ALPHA)';
			
			putexcel `col'3 = "EL_`score'";
			putexcel `col'5 = "[0`r(p_1)']";
			putexcel `col'7 = "[0`r(p_2)']";
			putexcel `col'9 = "[0`r(p_3)']";
			
		restore;
		
	};
	*end loop over components;
	
	capture erase "${dir}/Lee Bounds high FC `test' ${S_DATE}.txt";
	capture erase "${dir}/Lee Bounds low FC `test' ${S_DATE}.txt";
	capture erase "${dir}/Lee Bounds high RC `test' ${S_DATE}.txt";
	capture erase "${dir}/Lee Bounds low RC `test' ${S_DATE}.txt";
	
};
*end loop over tests;



#delimit;
use "${dta_dir}/1_Pupil_Data_all.dta", clear;

gen attritted = longitudinal_sample==0;


capture erase "${dir}/Attrition Predictors ${S_DATE}.txt";
capture erase "${dir}/Attrition Predictors ${S_DATE}.xls";

*test predictors of presence at endline;
	*local predictors Female Age BL_EGRA_PCA_Index BL_EGWA_PCA_Index BL_OE_PCA_Index;
	*just use same predictors as in Appendix Table A2;
	*doesn't matter for results;
	local predictors Female Age BL_EGRA_PCA_Index BL_EGWA_PCA_Index;

		
	reg attritted `predictors', 
		cluster(School_Code), if Study_Arm==0;
	outreg2 `predictors' 
		using "${dir}/Attrition Predictors ${S_DATE}.xls",
		adjr2 dec(3) fmt(fc) nocons excel keep(`predictors') 
		append;	
	reg attritted `predictors', cluster(School_Code), 
		if Study_Arm==1;
	outreg2 `predictors' 
		using "${dir}/Attrition Predictors ${S_DATE}.xls",
		adjr2 dec(3) fmt(fc) nocons excel keep(`predictors') 
		append;		
	reg attritted `predictors', cluster(School_Code), 
		if Study_Arm==2;
	outreg2 `predictors' 
		using "${dir}/Attrition Predictors ${S_DATE}.xls",
		adjr2 dec(3) fmt(fc) nocons excel keep(`predictors') 
		append;	

	local interactions;
	foreach predictor of local predictors{;
		gen `predictor'_X_MT = `predictor'*MT_Program;
		gen `predictor'_X_CCT = `predictor'*CCT_Program;
		local interactions `interactions' `predictor' `predictor'_X_MT `predictor'_X_CCT;
	};
	local interactions `interactions' MT_Program CCT_Program;
	reg attritted `interactions', cluster(School_Code);
	outreg2 `interactions' 
		using "${dir}/Attrition Predictors ${S_DATE}.xls",
		adjr2 dec(3) fmt(fc) nocons excel keep(`interactions') 
		sortvar(`interactions') append;				
	
capture erase "${dir}/Attrition Predictors ${S_DATE}.txt";	


*power calcs for small numbers of clusters
areg EL_EGRA_PCA_Index FC_Program RC_Program BL_EGRA_PCA_Index, a(group) cluster(School_Code)
boottest {FC_Program=0} {RC_Program=0} {FC_Program==RC_Program}, reps(1000) seed(100453) weighttype(webb) level(99.5)
di .6349642-(-.004056)
areg EL_EGRA_LN_Total_control_normed FC_Program RC_Program BL_EGRA_LN_Total , a(group) cluster(School_Code)
boottest {FC_Program=0} {RC_Program=0} {FC_Program==RC_Program}, reps(1000) seed(100453) weighttype(webb) level(99.5)
di 1.022229-.2343
areg EL_EGWA_PCA_Index FC_Program RC_Program BL_EGWA_PCA_Index, a(group) cluster(School_Code)
boottest {FC_Program=0} {RC_Program=0} {FC_Program==RC_Program}, reps(1000) seed(100453) weighttype(webb) level(99.5)
di .4034901-(-.2167)


